package 网页爬虫;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.PostMethod;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

public class 下载在线API文档 {

    private static Set<String> filenames = new HashSet<String>();

    public static String getSource(String url) {
        String response = null;
        HttpClient client = new HttpClient();
        HttpMethod method = null;
        try {
            method = new PostMethod(url);
            client.executeMethod(method);
            if (method.getStatusCode() == HttpStatus.SC_OK) {
                response = method.getResponseBodyAsString();
            }
        } catch (IOException e) {
            System.out.println("Get Source Error!");
        } finally {
            if (method != null)
                method.releaseConnection();
        }
        return response;
    }

    public static void create(String url, String froot, String cur) {
        String curUrl = url + "/" + cur;
        if (filenames.contains(curUrl)) {
            return;
        }
        filenames.add(curUrl);
        String cont = getSource(curUrl);
        if (cont == null) {
            return;
        }
        File f = null;
        FileOutputStream fos = null;
        try {
            f = new File(froot);
            if (!f.exists()) {
                f.mkdirs();
            }
            f = new File(froot + "\\" + cur);
            f.createNewFile();
            fos = new FileOutputStream(f);
            fos.write(cont.getBytes());
            fos.flush();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (fos != null) {
                try {
                    fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        String[] files = cont.split("\"");
        for (String file : files) {
            if (file.matches(".*\\.html$") && !file.startsWith("http")) {
                if (file.contains(" "))
                    continue;
                String turl = url;
                String tfroot = froot;
                file.replaceAll("//", "/");
                int tindex = file.lastIndexOf("?");
                if (tindex != -1) {
                    file = file.substring(0, tindex);
                }
                int index = 0;
                while ((index = file.indexOf("/")) != -1) {
                    if (file.charAt(index - 1 < 0 ? 0 : index - 1) == '.') {
                        turl = turl.substring(0, turl.lastIndexOf("/"));
                        tfroot = tfroot.substring(0, tfroot.lastIndexOf("\\"));
                    } else {
                        turl = turl + "/" + file.substring(0, index);
                        tfroot = tfroot + "\\" + file.substring(0, index);
                    }
                    file = file.substring(index + 1);
                }
                create(turl, tfroot, file);
            }
        }
    }

    public static void main(String[] args) {
        String url = "http://api.mongodb.org/java/2.2/";
        String froot = "F:\\MongoDB\\api";
        create(url, froot, "index.html");
    }
} 